Note this script doesn’t use clustered SEs and it uses the full dataframe including defendants who did not receive prison times. Its purpose was to identify relevant correlations between judge/defendant demographics and sentencing outcomes.

Set up

#load packages
require(ggplot2)
require(dplyr)
require(tidyr)
require(readxl)
require(openxlsx)
require(broom)
require(stringr)
#load short data frame: case x defendant level
df_short <- readxl::read_excel('../01_preprocessed_data/hidden/full_short.xlsx') %>%
  mutate(punishment_prison_days_conditiional = as.numeric(punishment_prison_days_conditiional),
         case_district_court = relevel(factor(case_district_court), "Oslo tingrett"))#reorder factor for regr. analysis
### FUNCTION: extract_vars ###
# @param cur_fp: filepath to spreadsheet containing variable list
# @param cur_sheet: sheet name containing variable list
#
# returns: vector of variables
extract_vars <- function(cur_fp, cur_sheet){
  cur_df <- readxl::read_excel(cur_fp, sheet = cur_sheet)
  return(cur_df$vars)
}

fp_desc <- '../02_config/config_descriptives.xlsx' #file path for descriptive variables
fp_regr <- '../02_config/config_regression.xlsx' #file path for regression variables

#load variables into vectors
defendant_chars_descriptive <- extract_vars(fp_desc, 'defendant_chars')
judge_chars_descriptive <- extract_vars(fp_desc,'judge_chars')
outcomes_descriptive <- extract_vars(fp_desc, 'outcomes')

defendant_chars_regression <- extract_vars(fp_regr, 'defendant_chars')
judge_chars_regression <- extract_vars(fp_regr,'judge_chars')
outcomes_regression <- extract_vars(fp_regr, 'outcomes')


circs_regression <- openxlsx::read.xlsx('../02_config/aggravating_mitigating_circs.xlsx')$col
sections_regression <- openxlsx::read.xlsx('../02_config/sections.xlsx')$col
sections_descriptive <- sections_regression

dir.create('../03_results/exploratory', showWarnings = F)

Basic Descriptives

### FUNCTION: descriptive_analysis ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param variables_to_check: vector of (string) variables to pivot over
#
# @returns: spreadsheet of pivot results
#
# @process: pivot raw_df by each variable in variables_to_check, calculate means for each
# variable in outcomes_descriptive, and add variable counts.
# Display means broken down by outcomes and each variable in variables_to_check.
descriptive_analysis <- function(raw_df, fp, column = '', print, variables_to_check){
  #set up master df
  desc_master_df <- data.frame()
  
  #loop over variables_to_check
  for(variable in variables_to_check){
    if(print) print(variable)
    
    #pivot raw_df by variable and calculate means for each variable in outcomes_descriptive
    cur_df <- raw_df %>%
      group_by_at(variable) %>%
      summarise_at(.vars = c(outcomes_descriptive, circs_regression), .funs = mean, na.rm = T)
      
    #calculate counts for each breakdown
    cur_df_count <- raw_df %>%
      group_by_at(variable) %>%
      count()
    
    #join counts and rename variables
    cur_df <- cur_df %>%
      left_join(cur_df_count, by = variable) %>%
      rename(var_value = variable) %>%
      mutate(var_value = as.character(var_value), #var_value contains the variable values of 'variable'
             grouping_var = variable, #variable we pivoted by
             restriction = column) #restriction condition
    
    #bind results to master df
    desc_master_df <- desc_master_df %>%
      bind_rows(cur_df)
  }
  
  #pivot to longer for graphical analysis, only keep outcome means
  desc_master_df_short <- desc_master_df %>%
    dplyr::select(outcomes_descriptive, var_value, grouping_var) %>%
    pivot_longer(cols = all_of(outcomes_descriptive), names_to = 'outcome_char', values_to = 'outcome_value') %>%
    filter(!is.na(grouping_var)) #get rid of NAs
  
  #loop over variables_to_check to plot results separately for each var in variables_to_check
  for(variable in variables_to_check){
    
    #restrict master_df to 'variable'
    cur_df <- desc_master_df_short %>%
      dplyr::filter(grouping_var == variable)
    
    #plot results; each plot is for a single variable in variables_to_check
    #x-axis are var_values, y-axis are outcome means, facets are different outcomes
    p <- ggplot(cur_df, aes(x = var_value, y = outcome_value))+
          geom_bar(stat = 'identity', position = 'dodge', fill = 'blue')+
          facet_wrap(.~outcome_char, scales = 'free_y')+
          labs(title = variable, x = 'characteristic',
               y = 'mean outcome value')+
          theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 5))
    #print if 'print' is set to true
    if(print) print(p)
  }
  
  
  #save master df (in wide format) at fp
  write.csv(desc_master_df, fp)
  
  #return master df (in wide format)
  return(desc_master_df)
}

Regression analysis

### FUNCTION: regression_analysis ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param variables_to_check: vector of (string) variables; independent variables
# @param controls: vector of variables to control for in 'Pers.' and 'Conv.' regressions
# @separate_displays: vector of variables for which to create separate graphs
#
# @returns: spreadsheet of regression results
#
# @process: loop over each variable in variables_to_check and each outcome in outcomes_regressions.
# regress outcome on variable without controls (None), with demographic controls and controls in 'controls' ('Pers.'),
# and the above-mentioned controls plus dummy variables for each conviction section ('Conv.').
# Display results for each variable in variables_to_check by outcome and model type (i.e., the level of controls). Make separate graphs 
# for factor variables with a lot of levels that are contained in separate_displays
regression_analysis <- function(raw_df, fp, column, print, variables_to_check, controls, seperate_displays = c(' ')){
  #set up master df
  reg_master_df <- data.frame()
  
  #loop over variables_to_check
  for(variable in variables_to_check){
    if(print) print(variable)
    
    #loop over outcomes_regression
    for(outcome in outcomes_regression){
      #No controls regression#
      #set up formula
      fr <- paste0(outcome, ' ~ ', variable)
      #fit; catch errors
      cur_fit <- try(lm(formula = fr, data = raw_df))
      if(is(cur_fit, 'try-error')){
        print(paste0('Failed: ', fr))
        next
      }
      #convert fit to df and correctly set up variables
      cur_fit_df <- cur_fit %>%
        tidy(conf.int = T, conf.level = 0.95) %>%
        as.data.frame() %>%
        filter(grepl(variable, term)) %>%
        mutate(outcome = outcome,
               sig_level = case_when(p.value < 0.01 ~ '***',
                                     p.value < 0.05 ~ '**',
                                     p.value < 0.1 ~ '*',
                                     .default = 'insig'),
               controls = 'None',
               restriction = column)
      #merge results with master df
      reg_master_df <- reg_master_df %>%
        bind_rows(cur_fit_df)
    }
  }
  #loop over outcomes_regression
  for(outcome in outcomes_regression){
    #Personal controls regression#
    #set up formula
    fr_pers <- paste0(outcome, ' ~ ', paste(c(variables_to_check, controls), collapse = ' + '))
    
    #fit with demographic controls; catch errors
    cur_fit_pers <- try(lm(formula = fr_pers, data = raw_df))
    if(is(cur_fit_pers, 'try-error')){
      print(paste0('Failed: ', fr_pers))
      next
    }
    
    #convert model to df
    cur_fit_pers_df <- cur_fit_pers %>%
      tidy(conf.int = T, conf.level = 0.95) %>%
      as.data.frame() %>%
      filter(grepl(paste(variables_to_check, collapse = '|'), term)) %>%
      mutate(outcome = outcome,
             sig_level = case_when(p.value < 0.01 ~ '***',
                                   p.value < 0.05 ~ '**',
                                   p.value < 0.1 ~ '*',
                                   .default = 'insig'),
             controls = 'Pers.',
             restriction = column)
    
    #merge results with master df
    reg_master_df <- reg_master_df %>%
      bind_rows(cur_fit_pers_df)
    
    #set up formula for model including conviction controls
    fr_conv <- paste0(outcome, ' ~ .')
    
    #keep only outcome variables and variables used as controls
    raw_df_restricted <- raw_df %>%
      dplyr::select(all_of(c(outcome, variables_to_check, controls, sections_regression, circs_regression)))
    
    #fit with all controls; catch errors
    cur_fit_conv <- lm(formula = fr_conv, data = raw_df_restricted)
    if(is(cur_fit_conv, 'try-error')){
      print(paste0('Failed: ', fr_conv))
      next
    }
    
    #convert model to df
    cur_fit_conv_df <- cur_fit_conv %>%
      tidy(conf.int = T, conf.level = 0.95) %>%
      as.data.frame() %>%
      filter(grepl(paste(variables_to_check, collapse = '|'), term)) %>%
      mutate(outcome = outcome,
             sig_level = case_when(p.value < 0.01 ~ '***',
                                   p.value < 0.05 ~ '**',
                                   p.value < 0.1 ~ '*',
                                   .default = 'insig'),
             controls = 'Conv.',
             restriction = column)
    
    #bind to master df
    reg_master_df <- reg_master_df %>%
      bind_rows(cur_fit_conv_df)
  }
  
  #set up order of levels for controls variable
  reg_master_df$controls <- factor(reg_master_df$controls, levels = c('None', 'Pers.', 'Conv.'))
  
  #generate graphs if print is set to true
  if(print){
    
    #loop over outcomes_regression
    for(cur_outcome in outcomes_regression){
      #restrict current dataframe to the current outcome variable and remove independent variables
      #contained in 'separate_displays' vector
      cur_df <- reg_master_df %>%
        filter(outcome == cur_outcome, !grepl(paste(seperate_displays, collapse = '|'), term))
      
      #generate plot: x-axis is types of controls used, y-axis is effect size, color is significance level
      #facet is the independent variable; allow y-axis scale to vary
      p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
        geom_point()+
        geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
        facet_wrap(.~term, scales = 'free_y')+
        labs(x = 'Controls',
             y = 'Effect Size',
             title = cur_outcome)
       print(p)
       
       #generate separate graphs for variables in 'seperate_displays'
       for(sep_var in seperate_displays){
         cur_df <- reg_master_df %>%
          filter(outcome == cur_outcome, grepl(sep_var, term)) %>%
          mutate(term_short = stringr::str_replace(term, sep_var, ''))
        if(nrow(cur_df) == 0) next
        p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
          geom_point()+
          geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
          facet_wrap(.~term_short)+
          labs(x = 'Controls',
               y = 'Effect Size',
               title = cur_outcome)
         print(p)
       }
    }
  }
  
  
  #save master df at fp
  write.csv(reg_master_df, fp)
  #return master df
  return(reg_master_df)
}

RQ 1: Do defendants who otherwise look the same receive harsher punishments (longer sentences, higher fines) according to their demographic characteristics?

#run descriptive analysis on defendant characteristics
df <- descriptive_analysis(df_short, '../03_results/exploratory/rq1_defendant_outcomes.csv', 'all', TRUE, defendant_chars_descriptive)
## [1] "defendant_age_groups"
## [1] "defendant_is_woman"
## [1] "defendant_some_foreign_background"
## [1] "defendant_is_married"
## [1] "defendant_net_income_quantile"
## [1] "case_is_not_oslo"

#run regression analysis on defendant characteristics
df <- regression_analysis(df_short, '../03_results/exploratory/rq1_regression_results.csv', 'all', TRUE, defendant_chars_regression, c())
## [1] "defendant_age"
## [1] "defendant_is_woman"
## [1] "defendant_some_foreign_background"
## [1] "defendant_log_net_income"
## [1] "case_is_not_oslo"
## [1] "defendant_is_married"

RQ 2: Which crimes have the largest disparities when it comes to equal treatment?

dir.create('../03_results/exploratory/rq2', showWarnings = F)
dir.create('../03_results/exploratory/rq2/defendant', showWarnings = F)

#set up master df
rq2_master_df_regr <- data.frame()

#loop over conviction variables
for (column in sections_regression) {
  #restrict dataset to individuals who have been convicted of a certain crime
  crime_df <- df_short %>% filter(.data[[column]] == 1)

  # Check the number of rows in crime_df
  if (nrow(crime_df) <= 100) {
    next  # Skip the current iteration if crime_df has 100 or more rows
  }
  
  if (column == "convicted_stay_of_execution_(conditional_imprisonment_)") 
    {
      next
    }
  
  print(column)
  
  #set up filenames to save results
  filename_regression <- paste0('../03_results/exploratory/rq2/defendant/', column, '_regression.csv')
  filename_descriptive <- paste0('../03_results/exploratory/rq2/defendant/', column, '_descriptive.csv')
  
  #run regression analysis for dataset restricted by crime
  cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE, defendant_chars_regression, c())
  #run descriptive analysis for dataset restricted by crime
  df <- descriptive_analysis(crime_df, filename_descriptive, column, FALSE,defendant_chars_descriptive)
  
  #bind regression results to master df
  rq2_master_df_regr <- rq2_master_df_regr %>%
    bind_rows(cur_df)
}
## [1] "convicted_stay_of_execution_.conditional_imprisonment_."
## [1] "convicted_bodily_injury"
## [1] "convicted_drug_offense"
## [1] "convicted_illegal_arming_in_a_public_place"
## [1] "convicted_participation"
## [1] "convicted_general_violation_of_traffic_act_1"
## [1] "convicted_drinking_within_six_hours_of_traffic_investigation"
## [1] "convicted_acquisition_possession_and_use_of_drugs"
## [1] "convicted_section_24_of_the_medicines_act_other"
## [1] "convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime."
## [1] "convicted_road_traffic_act_other_1"
## [1] "convicted_violating_traffic_signage_act"
## [1] "convicted_temporary_suspension_of_driving_lisence"
## [1] "convicted_assault_and_battery"
## [1] "convicted_reckless_behavior"
## [1] "convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint"
## [1] "convicted_damage"
## [1] "convicted_threats"
## [1] "convicted_serious_drug_offense"
## [1] "convicted_theft"
## [1] "convicted_road_traffic_act_other_5"
## [1] "convicted_violating_basic_traffic_rules"
## [1] "convicted_loss_of_the_right_to_drive_a_motor_vehicle."
## [1] "convicted_obstruction_of_a_public_official"
## [1] "convicted_fraud"
## [1] "convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official"
## [1] "convicted_violation_of_police_act"
## [1] "convicted_disorderly_conduct"
## [1] "convicted_incorrect_explanation"
#restrict regression results to results that are significant at p < 0.05 and models with full controls
rq2_sig_fc <- rq2_master_df_regr %>%
  filter(controls == 'Conv.', p.value < 0.05)

#save significant results
write.csv(rq2_sig_fc, '../03_results/exploratory/rq2_significant_results.csv')

#find crimes for which punishments are most likely to correlate with defendants' demographics
top_crimes <- rq2_sig_fc %>%
  group_by(restriction) %>%
  count() %>%
  arrange(desc(n))
print(head(top_crimes, n = 10))
## # A tibble: 10 × 2
## # Groups:   restriction [10]
##    restriction                                           n
##    <chr>                                             <int>
##  1 convicted_general_violation_of_traffic_act_1          5
##  2 convicted_drug_offense                                4
##  3 convicted_reckless_behavior                           4
##  4 convicted_violating_traffic_signage_act               4
##  5 convicted_acquisition_possession_and_use_of_drugs     3
##  6 convicted_damage                                      3
##  7 convicted_road_traffic_act_other_1                    3
##  8 convicted_section_24_of_the_medicines_act_other       3
##  9 convicted_temporary_suspension_of_driving_lisence     3
## 10 convicted_assault_and_battery                         2
#find defendant characteristics that affect the severity of judgements for the greatest number of crimes
#(counting each outcome separately)
top_char_all_outcomes <- rq2_sig_fc %>%
  group_by(term) %>%
  count() %>%
  arrange(desc(n))
print(head(top_char_all_outcomes))
## # A tibble: 6 × 2
## # Groups:   term [6]
##   term                                  n
##   <chr>                             <int>
## 1 defendant_is_married                 15
## 2 defendant_log_net_income             14
## 3 defendant_age                        13
## 4 defendant_is_woman                    8
## 5 case_is_not_oslo                      5
## 6 defendant_some_foreign_background     3
#find defendant characteristics that affect the severity of judgements for the greatest number of crimes
#(not counting each outcome separately)
top_char_sections <- rq2_sig_fc %>%
  group_by(term, restriction) %>%
  count() %>%
  group_by(term) %>%
  count() %>%
  arrange(desc(n))
print(head(top_char_sections, n = 10))
## # A tibble: 6 × 2
## # Groups:   term [6]
##   term                                  n
##   <chr>                             <int>
## 1 defendant_is_married                 13
## 2 defendant_log_net_income             12
## 3 defendant_age                        11
## 4 defendant_is_woman                    8
## 5 case_is_not_oslo                      4
## 6 defendant_some_foreign_background     3
#types of outcomes that are associated with defendants' demographics the most frequently
top_outcomes <- rq2_sig_fc %>%
  group_by(outcome) %>%
  count() %>%
  arrange(desc(n))
print(top_outcomes)
## # A tibble: 3 × 2
## # Groups:   outcome [3]
##   outcome                        n
##   <chr>                      <int>
## 1 punishment_fine               27
## 2 punishment_prison_days        17
## 3 judgement_convicted_binary    14
#find which defendant demographic is most frequently related to which outcome
defendant_outcome_freq <- as.data.frame(table(rq2_sig_fc$term, rq2_sig_fc$outcome)) %>% 
  arrange(desc(Freq))
print(defendant_outcome_freq)
##                                 Var1                       Var2 Freq
## 1               defendant_is_married            punishment_fine    8
## 2           defendant_log_net_income            punishment_fine    8
## 3                      defendant_age     punishment_prison_days    8
## 4                 defendant_is_woman judgement_convicted_binary    5
## 5                      defendant_age            punishment_fine    5
## 6               defendant_is_married judgement_convicted_binary    4
## 7           defendant_log_net_income     punishment_prison_days    4
## 8                   case_is_not_oslo            punishment_fine    3
## 9               defendant_is_married     punishment_prison_days    3
## 10          defendant_log_net_income judgement_convicted_binary    2
## 11 defendant_some_foreign_background judgement_convicted_binary    2
## 12                defendant_is_woman            punishment_fine    2
## 13                  case_is_not_oslo judgement_convicted_binary    1
## 14 defendant_some_foreign_background            punishment_fine    1
## 15                  case_is_not_oslo     punishment_prison_days    1
## 16                defendant_is_woman     punishment_prison_days    1
## 17                     defendant_age judgement_convicted_binary    0
## 18 defendant_some_foreign_background     punishment_prison_days    0
#set up variable that is one if the effect is positive (characteristic leads to higher number in the outcome category)
rq2_sig_fc <- rq2_sig_fc %>%
  dplyr::mutate(positive_sign = ifelse(estimate > 0, 1, 0))

#find which defendant characteristic leads to increases/decreases in the outcome category most frequently
defendant_outcome_freq_pos <- as.data.frame(table(rq2_sig_fc$term, rq2_sig_fc$outcome, rq2_sig_fc$positive_sign)) %>% 
  arrange(desc(Freq)) %>%
  rename(defendant_char = Var1,
         outcome = Var2,
         positive_sign = Var3)

#find which defendant characteristic is associated with increases in the outcome category the greatest number of times
defendant_outcome_freq_pos %>%
  filter(positive_sign == 1) %>%
  head(n = 10) %>%
  print()
##                       defendant_char                    outcome positive_sign
## 1           defendant_log_net_income            punishment_fine             1
## 2                      defendant_age     punishment_prison_days             1
## 3                      defendant_age            punishment_fine             1
## 4               defendant_is_married            punishment_fine             1
## 5                 defendant_is_woman judgement_convicted_binary             1
## 6               defendant_is_married judgement_convicted_binary             1
## 7  defendant_some_foreign_background judgement_convicted_binary             1
## 8               defendant_is_married     punishment_prison_days             1
## 9                   case_is_not_oslo judgement_convicted_binary             1
## 10 defendant_some_foreign_background            punishment_fine             1
##    Freq
## 1     8
## 2     8
## 3     5
## 4     4
## 5     3
## 6     2
## 7     2
## 8     2
## 9     1
## 10    1
#find which defendant characteristic is associated with decreases in the outcome category the greatest number of times
defendant_outcome_freq_pos %>%
  filter(positive_sign == 0) %>%
  head(n = 10) %>%
  print()
##              defendant_char                    outcome positive_sign Freq
## 1      defendant_is_married            punishment_fine             0    4
## 2  defendant_log_net_income     punishment_prison_days             0    4
## 3          case_is_not_oslo            punishment_fine             0    3
## 4      defendant_is_married judgement_convicted_binary             0    2
## 5        defendant_is_woman judgement_convicted_binary             0    2
## 6  defendant_log_net_income judgement_convicted_binary             0    2
## 7        defendant_is_woman            punishment_fine             0    2
## 8          case_is_not_oslo     punishment_prison_days             0    1
## 9      defendant_is_married     punishment_prison_days             0    1
## 10       defendant_is_woman     punishment_prison_days             0    1
#For each defendant characteristic and outcome category, find crimes for which there is a significant effect
#print the crimes where the effect is the biggest
for(cur_defendant in defendant_chars_regression){
  for(cur_outcome in outcomes_regression){
    cur_df <- rq2_sig_fc %>%
      dplyr::select(term, outcome, restriction, estimate, p.value) %>%
      dplyr::rename(characteristic = term,
                    crime = restriction) %>%
      filter(characteristic == cur_defendant, outcome == cur_outcome) %>%
      slice_max(abs(estimate), n = 10) %>%
      print()
  }
}
## [1] characteristic outcome        crime          estimate       p.value       
## <0 rows> (or 0-length row.names)
##   characteristic                outcome
## 1  defendant_age punishment_prison_days
## 2  defendant_age punishment_prison_days
## 3  defendant_age punishment_prison_days
## 4  defendant_age punishment_prison_days
## 5  defendant_age punishment_prison_days
## 6  defendant_age punishment_prison_days
## 7  defendant_age punishment_prison_days
## 8  defendant_age punishment_prison_days
##                                                                                                  crime
## 1                                                                               convicted_drug_offense
## 2                                                                   convicted_road_traffic_act_other_5
## 3                                                           convicted_illegal_arming_in_a_public_place
## 4 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## 5                                                    convicted_temporary_suspension_of_driving_lisence
## 6                                                              convicted_violating_traffic_signage_act
## 7                                                         convicted_general_violation_of_traffic_act_1
## 8                                         convicted_drinking_within_six_hours_of_traffic_investigation
##    estimate      p.value
## 1 3.4297419 0.0182207268
## 2 3.3465849 0.0480281313
## 3 3.2203970 0.0005294762
## 4 1.3118823 0.0261630056
## 5 1.2006079 0.0002596586
## 6 0.7925754 0.0137070858
## 7 0.5082964 0.0006492344
## 8 0.4913532 0.0010669092
##   characteristic         outcome                                        crime
## 1  defendant_age punishment_fine           convicted_road_traffic_act_other_1
## 2  defendant_age punishment_fine      convicted_violating_traffic_signage_act
## 3  defendant_age punishment_fine                  convicted_reckless_behavior
## 4  defendant_age punishment_fine convicted_general_violation_of_traffic_act_1
## 5  defendant_age punishment_fine                convicted_assault_and_battery
##    estimate     p.value
## 1 213.72790 0.004532597
## 2 201.03767 0.007543465
## 3 137.82396 0.044210147
## 4 109.57488 0.011120759
## 5  56.19813 0.012575137
##       characteristic                    outcome
## 1 defendant_is_woman judgement_convicted_binary
## 2 defendant_is_woman judgement_convicted_binary
## 3 defendant_is_woman judgement_convicted_binary
## 4 defendant_is_woman judgement_convicted_binary
## 5 defendant_is_woman judgement_convicted_binary
##                                                                                      crime
## 1                                                        convicted_violation_of_police_act
## 2                                                            convicted_assault_and_battery
## 3 convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official
## 4                                                              convicted_reckless_behavior
## 5                                                                   convicted_drug_offense
##        estimate     p.value
## 1 -2.792629e-15 0.001155201
## 2 -2.538348e-15 0.002411851
## 3  1.066977e-15 0.016410674
## 4  2.965502e-16 0.048688345
## 5  8.357818e-17 0.011273566
##       characteristic                outcome                           crime
## 1 defendant_is_woman punishment_prison_days convicted_incorrect_explanation
##    estimate    p.value
## 1 -13.49623 0.04608169
##       characteristic         outcome
## 1 defendant_is_woman punishment_fine
## 2 defendant_is_woman punishment_fine
##                                          crime  estimate    p.value
## 1 convicted_general_violation_of_traffic_act_1 -3379.833 0.03489335
## 2                             convicted_damage -3224.115 0.02903713
##                      characteristic                    outcome
## 1 defendant_some_foreign_background judgement_convicted_binary
## 2 defendant_some_foreign_background judgement_convicted_binary
##                                     crime     estimate      p.value
## 1                        convicted_damage 1.436724e-15 0.0008344268
## 2 convicted_violating_basic_traffic_rules 2.424160e-16 0.0156587539
## [1] characteristic outcome        crime          estimate       p.value       
## <0 rows> (or 0-length row.names)
##                      characteristic         outcome
## 1 defendant_some_foreign_background punishment_fine
##                                               crime estimate    p.value
## 1 convicted_temporary_suspension_of_driving_lisence 2941.334 0.04626381
##             characteristic                    outcome
## 1 defendant_log_net_income judgement_convicted_binary
## 2 defendant_log_net_income judgement_convicted_binary
##                                                                                      crime
## 1 convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official
## 2                                                              convicted_reckless_behavior
##        estimate    p.value
## 1 -7.957705e-17 0.03988181
## 2 -2.835157e-17 0.04331161
##             characteristic                outcome
## 1 defendant_log_net_income punishment_prison_days
## 2 defendant_log_net_income punishment_prison_days
## 3 defendant_log_net_income punishment_prison_days
## 4 defendant_log_net_income punishment_prison_days
##                                               crime   estimate      p.value
## 1                           convicted_participation -16.632751 5.577127e-03
## 2   convicted_section_24_of_the_medicines_act_other  -5.236602 2.944234e-05
## 3 convicted_acquisition_possession_and_use_of_drugs  -5.007349 1.540773e-04
## 4                                 convicted_threats  -3.541713 6.663680e-03
##             characteristic         outcome
## 1 defendant_log_net_income punishment_fine
## 2 defendant_log_net_income punishment_fine
## 3 defendant_log_net_income punishment_fine
## 4 defendant_log_net_income punishment_fine
## 5 defendant_log_net_income punishment_fine
## 6 defendant_log_net_income punishment_fine
## 7 defendant_log_net_income punishment_fine
## 8 defendant_log_net_income punishment_fine
##                                                                                                  crime
## 1                                              convicted_stay_of_execution_.conditional_imprisonment_.
## 2                                         convicted_drinking_within_six_hours_of_traffic_investigation
## 3                                                convicted_loss_of_the_right_to_drive_a_motor_vehicle.
## 4                                                         convicted_general_violation_of_traffic_act_1
## 5                                                    convicted_temporary_suspension_of_driving_lisence
## 6                                                      convicted_section_24_of_the_medicines_act_other
## 7                                                    convicted_acquisition_possession_and_use_of_drugs
## 8 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
##    estimate      p.value
## 1 1086.1980 3.732625e-02
## 2 1065.0868 3.158167e-09
## 3 1024.1496 2.564026e-02
## 4  879.4619 1.877034e-10
## 5  641.7874 2.804050e-06
## 6  546.8547 4.380510e-03
## 7  546.6866 3.938085e-03
## 8  373.6703 5.221588e-05
##     characteristic                    outcome           crime     estimate
## 1 case_is_not_oslo judgement_convicted_binary convicted_fraud 1.677324e-15
##      p.value
## 1 0.03762771
##     characteristic                outcome           crime  estimate    p.value
## 1 case_is_not_oslo punishment_prison_days convicted_fraud -34.51369 0.01005474
##     characteristic         outcome                                   crime
## 1 case_is_not_oslo punishment_fine      convicted_road_traffic_act_other_1
## 2 case_is_not_oslo punishment_fine convicted_violating_traffic_signage_act
## 3 case_is_not_oslo punishment_fine                         convicted_theft
##    estimate    p.value
## 1 -7476.818 0.01258817
## 2 -6274.398 0.03632426
## 3 -1941.121 0.02022391
##         characteristic                    outcome
## 1 defendant_is_married judgement_convicted_binary
## 2 defendant_is_married judgement_convicted_binary
## 3 defendant_is_married judgement_convicted_binary
## 4 defendant_is_married judgement_convicted_binary
##                                     crime      estimate    p.value
## 1          convicted_serious_drug_offense -4.575035e-15 0.02205752
## 2      convicted_road_traffic_act_other_5 -3.496628e-15 0.02876892
## 3 convicted_violating_traffic_signage_act  2.420500e-15 0.01881458
## 4      convicted_road_traffic_act_other_1  1.673703e-15 0.02175401
##         characteristic                outcome
## 1 defendant_is_married punishment_prison_days
## 2 defendant_is_married punishment_prison_days
## 3 defendant_is_married punishment_prison_days
##                                          crime   estimate     p.value
## 1               convicted_serious_drug_offense 1222.37492 0.002099873
## 2                       convicted_drug_offense  212.94071 0.005542127
## 3 convicted_general_violation_of_traffic_act_1  -17.81272 0.006536267
##         characteristic         outcome
## 1 defendant_is_married punishment_fine
## 2 defendant_is_married punishment_fine
## 3 defendant_is_married punishment_fine
## 4 defendant_is_married punishment_fine
## 5 defendant_is_married punishment_fine
## 6 defendant_is_married punishment_fine
## 7 defendant_is_married punishment_fine
## 8 defendant_is_married punishment_fine
##                                               crime   estimate      p.value
## 1   convicted_section_24_of_the_medicines_act_other -10709.170 0.0064693484
## 2 convicted_acquisition_possession_and_use_of_drugs  -9811.794 0.0108611354
## 3                       convicted_reckless_behavior  -9602.164 0.0007878867
## 4                 convicted_violation_of_police_act   7806.936 0.0442773840
## 5                                  convicted_damage   5956.355 0.0179288895
## 6                            convicted_drug_offense   4837.917 0.0263842150
## 7                                   convicted_theft   4790.092 0.0382015081
## 8                                 convicted_threats  -2316.773 0.0206096997
# Icebox for now
# 
# prefix <- "chapter_convicted"
# 
# prefix_columns <- grep(paste0("^", prefix), names(df_short), value = TRUE)
# 
# rq2_master_df_regr <- data.frame()
# 
# for (column in prefix_columns) {
#   crime_df <- df_short %>% filter(.data[[column]] == 1)
# 
#   # Check the number of rows in crime_df
#   if (nrow(crime_df) <= 200) {
#     next  # Skip the current iteration if crime_df has 100 or more rows
#   }
#   
#   print(column)
#   
#   filename_regression <- paste0('../03_results/exploratory/rq2/chapter/', column, '_regression.csv')
#   filename_descriptive <- paste0('../03_results/exploratory/rq2/chapter/', column, '_descriptive.csv')
#   
#   cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE,defendant_chars_regression, c())
#   df <- descriptive_analysis(crime_df, filename_descriptive, column, FALSE,defendant_chars_descriptive)
#   rq2_master_df_regr <- rq2_master_df_regr %>%
#     bind_rows(cur_df)
# }

RQ 3: Do the demographics of lay judges correlate to disparate sentencing outcomes when it comes to equal treatment?

#run descriptive analysis for judge characteristics
df <- descriptive_analysis(df_short, '../03_results/exploratory/rq3_judge_outcomes.csv', 'all', TRUE,judge_chars_descriptive)
## [1] "judge_age_groups"
## [1] "judge_is_woman"
## [1] "judge_some_foreign_background"
## [1] "judge_is_married"
## [1] "judge_net_income_quantile"
## [1] "case_is_not_oslo"

#run regression analysis for defendant characteristics
df <- regression_analysis(df_short, '../03_results/exploratory/rq3_judge_regression_results.csv', 'all', TRUE,judge_chars_regression, defendant_chars_regression, c('judge_party', 'case_district_court'))
## [1] "judge_age"
## [1] "judge_is_woman"
## [1] "judge_some_foreign_background"
## [1] "judge_log_net_income"
## [1] "judge_is_married"

RQ 3b: What are the crimes that have the strongest correlation between judge characteristics and outcomes?

dir.create('../03_results/exploratory/rq2/', showWarnings = F)
dir.create('../03_results/exploratory/rq2/judge/', showWarnings = F)

#extract all dummy variables for individual crimes people were convicted of
prefix <- "convicted_"
prefix_columns <- grep(paste0("^", prefix), names(df_short), value = TRUE)

#set up master df
rq3a_master_df_regr <- data.frame()

#loop over crime variables
for (column in prefix_columns) {
  
  #restrict data frame to people convucted of individual crimes
  crime_df <- df_short %>% filter(.data[[column]] == 1)

  # Check the number of rows in crime_df
  if (nrow(crime_df) <= 100) {
    next  # Skip the current iteration if crime_df has 100 or fewer rows
  }
  
  print(column)
  
  #set up filepaths to save results in
  filename_regression <- paste0('../03_results/exploratory/rq2/judge/', column, '_regression.csv')
  filename_descriptive <- paste0('../03_results/exploratory/rq2/judge/', column, '_descriptive.csv')
  
  #run regression analysis for dfs restricted to people convicted of individual crimes
  cur_df <- regression_analysis(crime_df, filename_regression, column, FALSE,judge_chars_regression, defendant_chars_regression)
  
  #bind to master df
  rq3a_master_df_regr <- rq3a_master_df_regr %>%
    bind_rows(cur_df)
}
## [1] "convicted_stay_of_execution_.conditional_imprisonment_."
## [1] "convicted_bodily_injury"
## [1] "convicted_drug_offense"
## [1] "convicted_illegal_arming_in_a_public_place"
## [1] "convicted_participation"
## [1] "convicted_general_violation_of_traffic_act_1"
## [1] "convicted_drinking_within_six_hours_of_traffic_investigation"
## [1] "convicted_acquisition_possession_and_use_of_drugs"
## [1] "convicted_section_24_of_the_medicines_act_other"
## [1] "convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime."
## [1] "convicted_road_traffic_act_other_1"
## [1] "convicted_violating_traffic_signage_act"
## [1] "convicted_temporary_suspension_of_driving_lisence"
## [1] "convicted_assault_and_battery"
## [1] "convicted_reckless_behavior"
## [1] "convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint"
## [1] "convicted_damage"
## [1] "convicted_threats"
## [1] "convicted_serious_drug_offense"
## [1] "convicted_theft"
## [1] "convicted_road_traffic_act_other_5"
## [1] "convicted_violating_basic_traffic_rules"
## [1] "convicted_loss_of_the_right_to_drive_a_motor_vehicle."
## [1] "convicted_obstruction_of_a_public_official"
## [1] "convicted_fraud"
## [1] "convicted_violence_threats_vandalism_or_other_unlawful_conduct_against_a_public_official"
## [1] "convicted_violation_of_police_act"
## [1] "convicted_disorderly_conduct"
## [1] "convicted_incorrect_explanation"
#restrict master df to models with full controls and results that were significant at the p < 0.05 level
rq3a_sig_fc <- rq3a_master_df_regr %>%
  filter(controls == 'Conv.', p.value < 0.05)

#save df with significant results
write.csv(rq3a_sig_fc, '../03_results/exploratory/rq3a_significant_results.csv')

#find crimes where outcomes were most commonly associated with judges' demographics
top_crimes <- rq3a_sig_fc %>%
  group_by(restriction) %>%
  count() %>%
  arrange(desc(n))
print(head(top_crimes, n = 10))
## # A tibble: 10 × 2
## # Groups:   restriction [10]
##    restriction                                                                 n
##    <chr>                                                                   <int>
##  1 convicted_participation                                                     4
##  2 convicted_incorrect_explanation                                             3
##  3 convicted_road_traffic_act_other_5                                          3
##  4 convicted_bodily_injury                                                     2
##  5 convicted_disorderly_conduct                                                2
##  6 convicted_fraud                                                             2
##  7 convicted_obstruction_of_a_public_official                                  2
##  8 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_re…     2
##  9 convicted_damage                                                            1
## 10 convicted_general_violation_of_traffic_act_1                                1
#find judges' characteristics that are significantly associated with outcomes for the lparamest number of individual crimes
#multiple outcomes per crime are counted multiple times
top_char_all_outcomes <- rq3a_sig_fc %>%
  group_by(term) %>%
  count() %>%
  arrange(desc(n))
print(head(top_char_all_outcomes))
## # A tibble: 5 × 2
## # Groups:   term [5]
##   term                              n
##   <chr>                         <int>
## 1 judge_age                         8
## 2 judge_some_foreign_background     6
## 3 judge_is_married                  5
## 4 judge_log_net_income              5
## 5 judge_is_woman                    3
#find judges' characteristics that are significantly associated with outcomes for the lparamest number of individual crimes
#multiple outcomes per crime are counted a single time
top_char_sections <- rq3a_sig_fc %>%
  group_by(term, restriction) %>%
  count() %>%
  group_by(term) %>%
  count() %>%
  arrange(desc(n))
print(head(top_char_sections, n = 10))
## # A tibble: 5 × 2
## # Groups:   term [5]
##   term                              n
##   <chr>                         <int>
## 1 judge_age                         7
## 2 judge_is_married                  5
## 3 judge_log_net_income              5
## 4 judge_some_foreign_background     5
## 5 judge_is_woman                    3
#find outcomes that are associated with judges' characteristics for the greatest number of crimes
top_outcomes <- rq3a_sig_fc %>%
  group_by(outcome) %>%
  count() %>%
  arrange(desc(n))
print(top_outcomes)
## # A tibble: 3 × 2
## # Groups:   outcome [3]
##   outcome                        n
##   <chr>                      <int>
## 1 punishment_fine               13
## 2 judgement_convicted_binary     7
## 3 punishment_prison_days         7
#find which judge characteristic is associated with which outcome for the greatest number of crimes
judge_outcome_freq <- as.data.frame(table(rq3a_sig_fc$term, rq3a_sig_fc$outcome)) %>% 
  arrange(desc(Freq))
print(judge_outcome_freq)
##                             Var1                       Var2 Freq
## 1                      judge_age            punishment_fine    5
## 2           judge_log_net_income            punishment_fine    3
## 3  judge_some_foreign_background     punishment_prison_days    3
## 4                 judge_is_woman judgement_convicted_binary    2
## 5           judge_log_net_income judgement_convicted_binary    2
## 6               judge_is_married            punishment_fine    2
## 7  judge_some_foreign_background            punishment_fine    2
## 8                      judge_age     punishment_prison_days    2
## 9               judge_is_married     punishment_prison_days    2
## 10                     judge_age judgement_convicted_binary    1
## 11              judge_is_married judgement_convicted_binary    1
## 12 judge_some_foreign_background judgement_convicted_binary    1
## 13                judge_is_woman            punishment_fine    1
## 14                judge_is_woman     punishment_prison_days    0
## 15          judge_log_net_income     punishment_prison_days    0
#set up variable for whether impact on outcome was positive or negative
rq3a_sig_fc <- rq3a_sig_fc %>%
  dplyr::mutate(positive_sign = ifelse(estimate > 0, 1, 0))

#find which judge characteristic is associated with increases/decreases
#in outcome variables for the greatest number of crimes
judge_outcome_freq_pos <- as.data.frame(table(rq3a_sig_fc$term, rq3a_sig_fc$outcome, rq3a_sig_fc$positive_sign)) %>% 
  arrange(desc(Freq)) %>%
  rename(defendant_char = Var1,
         outcome = Var2,
         positive_sign = Var3)

#find judge charactistics that lead to increases in an outcome variable for the greatest number of crimes
judge_outcome_freq_pos %>%
  filter(positive_sign == 1) %>%
  head(n = 10) %>%
  print()
##                   defendant_char                    outcome positive_sign Freq
## 1                      judge_age            punishment_fine             1    4
## 2                 judge_is_woman judgement_convicted_binary             1    2
## 3  judge_some_foreign_background            punishment_fine             1    2
## 4                      judge_age     punishment_prison_days             1    2
## 5                      judge_age judgement_convicted_binary             1    1
## 6               judge_is_married judgement_convicted_binary             1    1
## 7           judge_log_net_income judgement_convicted_binary             1    1
## 8  judge_some_foreign_background judgement_convicted_binary             1    1
## 9               judge_is_married            punishment_fine             1    0
## 10                judge_is_woman            punishment_fine             1    0
#find judge charactistics that lead to decreases in an outcome variable for the greatest number of crimes
judge_outcome_freq_pos %>%
  filter(positive_sign == 0) %>%
  head(n = 10) %>%
  print()
##                   defendant_char                    outcome positive_sign Freq
## 1           judge_log_net_income            punishment_fine             0    3
## 2  judge_some_foreign_background     punishment_prison_days             0    3
## 3               judge_is_married            punishment_fine             0    2
## 4               judge_is_married     punishment_prison_days             0    2
## 5           judge_log_net_income judgement_convicted_binary             0    1
## 6                      judge_age            punishment_fine             0    1
## 7                 judge_is_woman            punishment_fine             0    1
## 8                      judge_age judgement_convicted_binary             0    0
## 9               judge_is_married judgement_convicted_binary             0    0
## 10                judge_is_woman judgement_convicted_binary             0    0
#find crimes where judges' characteristics have the biggest impact on sentencing outcomes
for(cur_judge in judge_chars_regression){
  for(cur_outcome in outcomes_regression){
    cur_df <- rq3a_sig_fc %>%
      dplyr::select(term, outcome, restriction, estimate, p.value) %>%
      dplyr::rename(characteristic = term,
                    crime = restriction) %>%
      filter(characteristic == cur_judge, outcome == cur_outcome) %>%
      slice_max(abs(estimate), n = 10) %>%
      print()
  }
}
##   characteristic                    outcome           crime     estimate
## 1      judge_age judgement_convicted_binary convicted_fraud 8.587035e-17
##       p.value
## 1 0.001905894
##   characteristic                outcome
## 1      judge_age punishment_prison_days
## 2      judge_age punishment_prison_days
##                                                                                                  crime
## 1 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
## 2                                                    convicted_temporary_suspension_of_driving_lisence
##   estimate    p.value
## 1 1.325579 0.03927478
## 2 0.679432 0.04823379
##   characteristic         outcome                                      crime
## 1      judge_age punishment_fine         convicted_road_traffic_act_other_5
## 2      judge_age punishment_fine                          convicted_threats
## 3      judge_age punishment_fine convicted_obstruction_of_a_public_official
## 4      judge_age punishment_fine            convicted_incorrect_explanation
## 5      judge_age punishment_fine                            convicted_fraud
##     estimate      p.value
## 1 -867.03683 2.177773e-02
## 2  113.35012 2.022627e-04
## 3  110.19710 4.713502e-02
## 4   62.62385 5.651788e-08
## 5   48.50448 2.471014e-02
##   characteristic                    outcome                              crime
## 1 judge_is_woman judgement_convicted_binary convicted_road_traffic_act_other_5
## 2 judge_is_woman judgement_convicted_binary            convicted_participation
##       estimate     p.value
## 1 2.102379e-15 0.001281134
## 2 1.249239e-15 0.042536879
## [1] characteristic outcome        crime          estimate       p.value       
## <0 rows> (or 0-length row.names)
##   characteristic         outcome            crime  estimate    p.value
## 1 judge_is_woman punishment_fine convicted_damage -2321.286 0.03283881
##                  characteristic                    outcome
## 1 judge_some_foreign_background judgement_convicted_binary
##                          crime     estimate      p.value
## 1 convicted_disorderly_conduct 5.951542e-15 0.0003184837
##                  characteristic                outcome
## 1 judge_some_foreign_background punishment_prison_days
## 2 judge_some_foreign_background punishment_prison_days
## 3 judge_some_foreign_background punishment_prison_days
##                                     crime   estimate    p.value
## 1                 convicted_bodily_injury -132.02808 0.02439854
## 2 convicted_violating_basic_traffic_rules  -62.03556 0.03464084
## 3            convicted_disorderly_conduct  -47.00880 0.03199822
##                  characteristic         outcome
## 1 judge_some_foreign_background punishment_fine
## 2 judge_some_foreign_background punishment_fine
##                                          crime estimate    p.value
## 1                      convicted_participation 7293.264 0.01807152
## 2 convicted_general_violation_of_traffic_act_1 6843.366 0.02590505
##         characteristic                    outcome                   crime
## 1 judge_log_net_income judgement_convicted_binary convicted_participation
## 2 judge_log_net_income judgement_convicted_binary convicted_bodily_injury
##        estimate      p.value
## 1  2.806408e-15 4.808496e-05
## 2 -1.748549e-15 2.631840e-04
## [1] characteristic outcome        crime          estimate       p.value       
## <0 rows> (or 0-length row.names)
##         characteristic         outcome
## 1 judge_log_net_income punishment_fine
## 2 judge_log_net_income punishment_fine
## 3 judge_log_net_income punishment_fine
##                                        crime   estimate     p.value
## 1 convicted_obstruction_of_a_public_official -2658.4411 0.023575930
## 2                            convicted_theft -1441.7071 0.038169309
## 3            convicted_incorrect_explanation  -840.6299 0.001284872
##     characteristic                    outcome
## 1 judge_is_married judgement_convicted_binary
##                                                                                                  crime
## 1 convicted_sentencing_beyond_the_maximum_sentence_.multiple_offences_repeat_offences_organized_crime.
##       estimate    p.value
## 1 2.728566e-15 0.04525535
##     characteristic                outcome                              crime
## 1 judge_is_married punishment_prison_days            convicted_participation
## 2 judge_is_married punishment_prison_days convicted_road_traffic_act_other_5
##     estimate     p.value
## 1 -199.83461 0.002401027
## 2  -71.04465 0.045099605
##     characteristic         outcome
## 1 judge_is_married punishment_fine
## 2 judge_is_married punishment_fine
##                                                                               crime
## 1 convicted_violation_of_residence_and_contact_prohibition_or_decision_on_restraint
## 2                                                   convicted_incorrect_explanation
##     estimate     p.value
## 1 -3077.3602 0.005058206
## 2  -557.7092 0.018938851
### FUNCTION: descriptive_analysis_2d ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param vars_d1: vector of (string) variables to pivot over
# @param vars_d2: vector of (string) variables to pivot over
#
# @returns: spreadsheet of pivot results
#
# @process: pivot raw_df by each combination of variables in in vars_d1 and vars_d2, calculate means for each
# variable in outcomes_descriptive, and add variable counts.
# Display means broken down by outcomes and each variable combination
descriptive_analysis_2d <- function(raw_df, fp, column, print, vars_d1, vars_d2){
  desc_master_df <- data.frame()
  #loop over variables in vars_d1
  for(v1 in vars_d1){
    
    #loop over variables in vars_d2
    for(v2 in vars_d2){
      if(v1 == v2) next
      if(print) print(paste0(v1, ': ', v2))
      
      
      #group raw_df by current variables v1 and v2 and calculate mean values for each outcome variable
      cur_df <- raw_df %>%
        group_by_at(vars(v1, v2)) %>%
        summarise_at(.vars = outcomes_descriptive, .funs = mean, na.rm = T)
        
      
      #calculate count (number of observations) for the variable combinations
      cur_df_count <- raw_df %>%
        group_by_at(vars(v1, v2)) %>%
        count()
      
      #join count and outcome mean table and rename variables
      cur_df <- cur_df %>%
      left_join(cur_df_count, by = c(v1, v2)) %>%
      rename(v1_value = v1,
             v2_value = v2) %>%
      mutate(v1_value = as.character(v1_value),
             v2_value = as.character(v2_value),
             v1 = v1,
             v2 = v2,
             restriction = column)
      
      #bind summary table to master df
      desc_master_df <- desc_master_df %>%
        bind_rows(cur_df)
    }
  }
  #if print is set to true
  if(print){
    #pivot master df to longer for graphical display
    desc_master_df_short <- desc_master_df %>%
      pivot_longer(cols = all_of(outcomes_descriptive), names_to = 'outcome_char', values_to = 'outcome_value') %>%
      filter(!is.na(v1_value), !is.na(v2_value))
    #loop over variables in vars_d1
    for(cur_v1 in vars_d1){
      
      #loop over variables in vars_d2
      for(cur_v2 in vars_d2){
        
        #loop over outcome variables in outcomes_descriptive
        for(outcome_var in outcomes_descriptive){
          #restrict master df by current v1, v2, and outcome variable
          cur_df <- desc_master_df_short %>%
            dplyr::filter(v1 == cur_v1, v2 == cur_v2, outcome_char == outcome_var)
          
          if(length(unique(cur_df$v2_value)) < 1) next
          
          #plot descriptive results: x-axis are the values that can be taken on by v1, y-axis is the outcome mean
          # facets are the values that v2 can take on
          p <- ggplot(cur_df, aes(x = v1_value, y = outcome_value))+
                geom_bar(stat = 'identity', position = 'dodge')+
                facet_wrap(.~v2_value)+
                labs(title = paste0(outcome_var, ': ', cur_v1, ' x ', cur_v2),
                     subtitle = paste0(outcome_var, ' by ', cur_v1, ' (x-axis) \nand ', cur_v2, ' (grid)'),
                     x = cur_v1,
                     y = outcome_var)+
                theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0))
          print(p)
        }
      }
    }
  }
  #save master df
  write.csv(desc_master_df, fp)
  #return master df
  return(desc_master_df)
}
### FUNCTION: regression_analysis_2d ###
# @param raw_df: df that contains all variables contained in other paramuments
# @param fp: filepath to save results spreadsheet (needs to end in '.csv')
# @param column: If raw_df was restricted in some way, this can be indicated here
# @param print: boolean to indicate if graphs should be generated
# @param vars_d1: vector of (string) variables
# @param vars_d2: vector of (string) variables
#
# @returns: spreadsheet of regression results
#
# @process: loop over each variable combination in vars_d1 and vars_d2 and each outcome in outcomes_regressions.
# Regress outcome on interaction of variable combination without controls (None), controlling for all variables in vars_d1 and vars_d2,
# and the above-mentioned controls plus dummy variables for each conviction section ('Conv.').
# Display results for each variable combination by outcome and model type (i.e., the level of controls).
regression_analysis_2d <- function(raw_df, fp, column, print, vars_d1, vars_d2){
  #set up master df
  reg_master_df <- data.frame()
  
  #loop over variables in vars_d1
  for(v1 in vars_d1){
    
    #loop over variables in vars_d2
    for(v2 in vars_d2){
      if(v1 == v2) next
      if(print) print(paste0(v1, ': ', v2))
      
      #loop over variables in outcomes_regression
      for(outcome in outcomes_regression){
        
        #set up formula for interaction effect between v1 and v2 without any controls
        fr <- paste0(outcome, ' ~ ', v1, '*', v2)
        
        #fit; catch errors
        cur_fit <- try(lm(formula = fr, data = raw_df))
        if(is(cur_fit, 'try-error')){
          print(paste0('Failed: ', fr))
          next
        }
        
        #convert model to df and format variables
        cur_fit_df <- cur_fit %>%
          tidy(conf.int = T, conf.level = 0.95) %>%
          as.data.frame() %>%
          filter(grepl(v1, term), grepl(v2, term)) %>%
          mutate(outcome = outcome,
                 sig_level = case_when(p.value < 0.01 ~ '***',
                                       p.value < 0.05 ~ '**',
                                       p.value < 0.1 ~ '*',
                                       .default = 'insig'),
                 controls = 'None',
                 restriction = column) %>%
          separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
        
        #save regression results to master df
        reg_master_df <- reg_master_df %>%
          bind_rows(cur_fit_df)
        
        #set up control vector by combining vars_d1 and vars_d2 and removing variables currently used for interaction effect
        controls <- unique(c(vars_d1, vars_d2))
        controls <- controls[!controls %in% c(v1, v2)]
        
        #set up formula with controls
        fr_pers <- paste(c(fr, controls), collapse = ' + ')
        
        #run model; catch errors
        cur_fit_pers <- try(lm(formula = fr_pers, data = raw_df))
        if(is(cur_fit_pers, 'try-error')){
          print(paste0('Failed: ', fr))
          next
        }
        
        #convert to df and format variables
        cur_fit_pers_df <- cur_fit_pers %>%
          tidy(conf.int = T, conf.level = 0.95) %>%
          as.data.frame() %>%
          filter(grepl(v1, term), grepl(v2, term)) %>%
          mutate(outcome = outcome,
                 sig_level = case_when(p.value < 0.01 ~ '***',
                                       p.value < 0.05 ~ '**',
                                       p.value < 0.1 ~ '*',
                                       .default = 'insig'),
                 controls = 'Pers.',
                 restriction = column)%>%
          separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
        
        #save results in master df
        reg_master_df <- reg_master_df %>%
          bind_rows(cur_fit_pers_df)
        
        #set up df and only keep outcome, control and conviction variables
        df_restricted <- raw_df %>%
          dplyr::select(all_of(c(outcome, v1, v2, controls, sections_regression, circs_regression)))
        
        #set up formula for Conv. model
        fr_conv <- paste0(fr, ' + .')
        
        #run model; catch errors
        cur_fit_conv <- try(lm(formula = fr_conv, data = df_restricted))
        if(is(cur_fit_conv, 'try-error')){
          print(paste0('Failed: ', fr))
          next
        }
        
        #convert model to df and format variables
        cur_fit_conv_df <- cur_fit_conv %>%
          tidy(conf.int = T, conf.level = 0.95) %>%
          as.data.frame() %>%
          filter(grepl(v1, term), grepl(v2, term)) %>%
          mutate(outcome = outcome,
                 sig_level = case_when(p.value < 0.01 ~ '***',
                                       p.value < 0.05 ~ '**',
                                       p.value < 0.1 ~ '*',
                                       .default = 'insig'),
                 controls = 'Conv.',
                 restriction = column)%>%
          separate(col = term, into = c('judge_char', 'defendant_char'), sep = ':', remove = F)
        
        #save results to master df
        reg_master_df <- reg_master_df %>%
          bind_rows(cur_fit_conv_df)
      }
    }
  }
  
  #set up factors for model types
  reg_master_df$controls <- factor(reg_master_df$controls, levels = c('None', 'Pers.', 'Conv.'))
  
  #if print is set to true
  if(print){
    #loop over unique elements in 'judge_char' variable
    for(v1 in unique(reg_master_df$judge_char)){
      #loop over unique elements in 'defendant_char' variable
      for(v2 in unique(reg_master_df$defendant_char)){
        
        #restrict master df to results for the current judge_char and defendant_char
        cur_df <- reg_master_df %>%
           filter(grepl(v1, term), grepl(v2, term))
        
        if(nrow(cur_df) == 0 | length(unique(cur_df$outcome)) < 1) next
        
        #plot regression results; x-axis is the model type, y-axis is the effect size, color is the significance level,
        #facet is the outcome type
        p <- ggplot(cur_df, aes(x = controls, y = estimate, color = sig_level))+
          geom_point()+
          geom_errorbar(aes(ymin = conf.low, ymax = conf.high))+
          facet_wrap(.~outcome, scales = 'free')+
          labs(x = 'Controls',
               y = 'Effect Size',
               title = paste0(v1, ' x ', v2))
         print(p)
      }
    }
  }
  
  #save master df at fp
  write.csv(reg_master_df, fp)
  #return master df
  return(reg_master_df)
}

##RQ 3c: How do lay judge demographics interact with defendant demographics when it comes to equal treatment?

dir.create('../03_results/exploratory/rq3', showWarnings = F)

#run descriptive_analysis_2d for judge and defendant demographic characteristics
df <- descriptive_analysis_2d(df_short, '../03_results/exploratory/rq3/high_level_descriptives.csv', '', TRUE, judge_chars_descriptive, defendant_chars_descriptive)
## [1] "judge_age_groups: defendant_age_groups"
## [1] "judge_age_groups: defendant_is_woman"
## [1] "judge_age_groups: defendant_some_foreign_background"
## [1] "judge_age_groups: defendant_is_married"
## [1] "judge_age_groups: defendant_net_income_quantile"
## [1] "judge_age_groups: case_is_not_oslo"
## [1] "judge_is_woman: defendant_age_groups"
## [1] "judge_is_woman: defendant_is_woman"
## [1] "judge_is_woman: defendant_some_foreign_background"
## [1] "judge_is_woman: defendant_is_married"
## [1] "judge_is_woman: defendant_net_income_quantile"
## [1] "judge_is_woman: case_is_not_oslo"
## [1] "judge_some_foreign_background: defendant_age_groups"
## [1] "judge_some_foreign_background: defendant_is_woman"
## [1] "judge_some_foreign_background: defendant_some_foreign_background"
## [1] "judge_some_foreign_background: defendant_is_married"
## [1] "judge_some_foreign_background: defendant_net_income_quantile"
## [1] "judge_some_foreign_background: case_is_not_oslo"
## [1] "judge_is_married: defendant_age_groups"
## [1] "judge_is_married: defendant_is_woman"
## [1] "judge_is_married: defendant_some_foreign_background"
## [1] "judge_is_married: defendant_is_married"
## [1] "judge_is_married: defendant_net_income_quantile"
## [1] "judge_is_married: case_is_not_oslo"
## [1] "judge_net_income_quantile: defendant_age_groups"
## [1] "judge_net_income_quantile: defendant_is_woman"
## [1] "judge_net_income_quantile: defendant_some_foreign_background"
## [1] "judge_net_income_quantile: defendant_is_married"
## [1] "judge_net_income_quantile: defendant_net_income_quantile"
## [1] "judge_net_income_quantile: case_is_not_oslo"
## [1] "case_is_not_oslo: defendant_age_groups"
## [1] "case_is_not_oslo: defendant_is_woman"
## [1] "case_is_not_oslo: defendant_some_foreign_background"
## [1] "case_is_not_oslo: defendant_is_married"
## [1] "case_is_not_oslo: defendant_net_income_quantile"

#run regression_analysis_2d for judge and defendant demographic characteristics
reg_df <- regression_analysis_2d(df_short, '../03_results/exploratory/rq3/high_level_regression.csv', '', TRUE, judge_chars_regression, defendant_chars_regression)
## [1] "judge_age: defendant_age"
## [1] "judge_age: defendant_is_woman"
## [1] "judge_age: defendant_some_foreign_background"
## [1] "judge_age: defendant_log_net_income"
## [1] "judge_age: case_is_not_oslo"
## [1] "judge_age: defendant_is_married"
## [1] "judge_is_woman: defendant_age"
## [1] "judge_is_woman: defendant_is_woman"
## [1] "judge_is_woman: defendant_some_foreign_background"
## [1] "judge_is_woman: defendant_log_net_income"
## [1] "judge_is_woman: case_is_not_oslo"
## [1] "judge_is_woman: defendant_is_married"
## [1] "judge_some_foreign_background: defendant_age"
## [1] "judge_some_foreign_background: defendant_is_woman"
## [1] "judge_some_foreign_background: defendant_some_foreign_background"
## [1] "judge_some_foreign_background: defendant_log_net_income"
## [1] "judge_some_foreign_background: case_is_not_oslo"
## [1] "judge_some_foreign_background: defendant_is_married"
## [1] "judge_log_net_income: defendant_age"
## [1] "judge_log_net_income: defendant_is_woman"
## [1] "judge_log_net_income: defendant_some_foreign_background"
## [1] "judge_log_net_income: defendant_log_net_income"
## [1] "judge_log_net_income: case_is_not_oslo"
## [1] "judge_log_net_income: defendant_is_married"
## [1] "judge_is_married: defendant_age"
## [1] "judge_is_married: defendant_is_woman"
## [1] "judge_is_married: defendant_some_foreign_background"
## [1] "judge_is_married: defendant_log_net_income"
## [1] "judge_is_married: case_is_not_oslo"
## [1] "judge_is_married: defendant_is_married"